import eia_api as api
import eia_etl as etl
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxEIA API - Data Backfill
Load libraries
API Settings:
raw_json = open("../metadata/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]
facets_template = {
"parent" : None,
"subba" : None
}
start = datetime.datetime(2018, 6, 20, 1)
end = datetime.datetime(2024, 2, 18, 1)
offset = 2250
eia_api_key = os.getenv('EIA_API_KEY')
meta_path = "../metadata/ciso_log_py.csv"
data_path = "../csv/ciso_grid_py.csv"api_metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
print(api_metadata.meta["endPeriod"])
end = pd.to_datetime(api_metadata.meta["endPeriod"])
print(end)2024-02-22T08
2024-02-22 08:00:00
meta_obj = eia_data.get_metadata(api_key = eia_api_key, api_path = api_path, meta_path = meta_path, series = series)m = meta_obj.request_meta
index = meta_obj.last_index + 1
data = None
for i in m.index:
facets = facets_template
facets["parent"] = m.at[i, "parent"]
facets["subba"] = m.at[i, "subba"]
start = m.at[i, "request_start"]
end = m.at[i, "end"]
print(facets)
if m.at[i, "updates_available"]:
temp = api.eia_backfile(api_key = eia_api_key,
api_path = api_path+ "data",
facets = facets,
start = start.to_pydatetime(),
end = end.to_pydatetime(),
offset = offset)
ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
ts_obj = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
ts_obj.drop("period", axis = 1, inplace= True)
ts_obj = ts_obj.rename(columns= {"index": "period"})
else:
ts_obj = None
print("No new data is available")
meta_temp = eia_data.create_metadata(data = ts_obj, start = start, end = end, type = "refresh")
if ts_obj is None:
meta_temp["parent"] = m.at[i, "parent"]
meta_temp["subba"] = m.at[i, "subba"]
if meta_temp["success"]:
d = eia_data.append_data(data_path = data_path, new_data = ts_obj, save = True)
meta_temp["update"] = True
else:
meta_temp["update"] = False
meta_temp["comments"] = meta_temp["comments"] + "The data refresh failed, please check the log; "
meta_df = pd.DataFrame([meta_temp])
if data is None:
data = ts_obj
else:
data = data._append(ts_obj)
if i == series.index.start:
meta_new = meta_df
else:
meta_new = meta_new._append(meta_df){'parent': 'CISO', 'subba': 'PGAE'}
Save the data to CSV file
{'parent': 'CISO', 'subba': 'SCE'}
Save the data to CSV file
{'parent': 'CISO', 'subba': 'SDGE'}
Save the data to CSV file
{'parent': 'CISO', 'subba': 'VEA'}
Save the data to CSV file
print(meta_new)
meta_updated = eia_data.append_metadata(meta_path = meta_path, meta = meta_new, save = True, init = False)
print(meta_updated) index parent subba time start \
0 None CISO PGAE 2024-02-23 03:34:54.866756+00:00 2024-02-18 02:00:00
0 None CISO SCE 2024-02-23 03:34:56.421798+00:00 2024-02-18 02:00:00
0 None CISO SDGE 2024-02-23 03:34:57.735037+00:00 2024-02-18 02:00:00
0 None CISO VEA 2024-02-23 03:34:59.082815+00:00 2024-02-18 02:00:00
end start_act end_act start_match \
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
end_match n_obs na type update success comments
0 True 103 0 refresh True True
0 True 103 0 refresh True True
0 True 103 0 refresh True True
0 True 103 0 refresh True True
index parent subba time start \
0 1 CISO PGAE 2024-02-23 03:32:21.312337+00:00 2018-07-01 08:00:00
1 1 CISO SCE 2024-02-23 03:32:36.118749+00:00 2018-07-01 08:00:00
2 1 CISO SDGE 2024-02-23 03:32:51.011954+00:00 2018-07-01 08:00:00
3 1 CISO VEA 2024-02-23 03:33:06.089176+00:00 2018-07-01 08:00:00
0 2 CISO PGAE 2024-02-23 03:34:54.866756+00:00 2024-02-18 02:00:00
0 2 CISO SCE 2024-02-23 03:34:56.421798+00:00 2024-02-18 02:00:00
0 2 CISO SDGE 2024-02-23 03:34:57.735037+00:00 2024-02-18 02:00:00
0 2 CISO VEA 2024-02-23 03:34:59.082815+00:00 2024-02-18 02:00:00
end start_act end_act start_match \
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
1 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
2 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
3 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
0 2024-02-22 08:00:00 2024-02-18 02:00:00 2024-02-22 08:00:00 True
end_match n_obs na type update success \
0 True 49386 98 backfile True True
1 True 49386 98 backfile True True
2 True 49386 98 backfile True True
3 True 49386 98 backfile True True
0 True 103 0 refresh True True
0 True 103 0 refresh True True
0 True 103 0 refresh True True
0 True 103 0 refresh True True
comments
0 Missing values were found;
1 Missing values were found;
2 Missing values were found;
3 Missing values were found;
0
0
0
0
Plot the Series
We will use Plotly to visualize the series:
if data is not None:
d = data.sort_values(by = ["subba", "period"])
p = px.line(data, x="period", y="value", color="subba")
p.show()
else:
print("No new data is available")full_data = pd.read_csv(data_path)
full_data.head()
full_data["period"] = pd.to_datetime(full_data["period"])
p = px.line(full_data, x="period", y="value", color="subba")
p.show()